In [1]:
# Plotting related python libraries
import matplotlib.pyplot as plt
# Standard csv python library
import csv
# Main python library for mathematical calculations
import numpy as np
# Python libraries for manipulating dates and times as objects
import time
import datetime
import dateutil
In [ ]:
def correlation_coefficient(xdata,ydata):
#Calculates the correlation coefficient between the two data files
# 'x' and 'y' data respresent the two data sets being compared
xmean = np.mean(xdata) #Calculating the average in each data set
ymean = np.mean(ydata)
xsigma = np.sqrt(np.var(xdata)) #Calculating the standard deviation in each data
ysigma = np.sqrt(np.var(ydata))
xysums = 0
for i in range(len(xdata)): #Calcuating the sum of x and y data
xdiff = xdata[i] - xmean
ydiff = ydata[i] - ymean
xysums = xdiff * ydiff +xysums
stnddevs = xsigma * ysigma
coeff = xysums/stnddevs/len(xdata)
return coeff
In [ ]:
#Opens csv files chosen by the user
user_file1 = input("File Name 1: ")
resultsa = csv.reader(open(user_file1), delimiter=',')
user_file2 = input("File Name 2: ")
resultsb = csv.reader(open(user_file2), delimiter=',')
In [ ]:
timesa = []
timesb = []
Val25a = []
Val25b = []
row_countera= 0
for r in resultsa:
#Skip first row (Row that specifies fields)
row_countera += 1
if row_countera>1:
#Append each column in CSV to a separate list
timesa.append(dateutil.parser.parse(r[0]))
Val25a.append(int(r[8]))
row_counterb= 0
for r in resultsb:
row_counterb += 1
if row_counterb>1:
timesb.append(dateutil.parser.parse(r[0]))
Val25b.append(int(r[8]))
In [ ]:
#Choose the number of data points to combine and average
n_merge = int(input("n data points to combine:"))
ndata_a = len(Val25a)
ndata_b = len(Val25b)
nsum_data_a= int(ndata_a/n_merge)
nsum_data_b= int(ndata_b/n_merge)
In [ ]:
data_ave_a = []
data_ave_b = []
data_unc_a = []
data_unc_b = []
merge_times_a = []
merge_times_b = []
for i in range(nsum_data_a):
#Calculate the mean and standard deviation
idata = Val25a[i*n_merge:(i+1)*n_merge]
idata_array = np.asarray(idata) #Convert 'idata' to a numpy array
aqmean = np.mean(idata_array)
aqsigma = np.sqrt(np.var(idata_array))
data_ave_a.append(aqmean)
data_unc_a.append(aqsigma)
itimes = timesa[i*n_merge:(i+1)*n_merge]
itime = itimes[int(len(itimes)/2)]
merge_times_a.append(itime)
for i in range(nsum_data_b):
#Calculate the mean and standard deviation
idata = Val25b[i*n_merge:(i+1)*n_merge]
idata_array = np.asarray(idata)
aqmean = np.mean(idata_array)
aqsigma = np.sqrt(np.var(idata_array))
data_ave_b.append(aqmean)
data_unc_b.append(aqsigma)
itimes = timesb[i*n_merge:(i+1)*n_merge]
itime = itimes[int(len(itimes)/2)]
merge_times_b.append(itime)
In [ ]:
fig = plt.figure()
#Plotting the first graph, each data set on two separate lines
plt.figure(1)
plt.plot(merge_times_a, data_ave_a, "b.", label='File 1')
plt.plot(merge_times_b, data_ave_b, "g.", label = 'File 2')
plt.legend(loc="best")
plt.xlabel("Time")
plt.ylabel("Particle Concentration 2.5")
file_title = "Air Quality Test Results"
plt.title(file_title)
fig.autofmt_xdate()
In [ ]:
#Calculate the correlation coefficient of the data from the two sensors
data_arrayA = np.asarray(data_ave_a)
data_arrayB = np.asarray(data_ave_b)
index = [0, len(data_arrayB) - 1, len(data_arrayB - 2), len(data_arrayB) - 3, len(data_arrayB) - 4]
data_arrayB = np.delete(data_arrayB, index) #If the data is off by a few data points, delete the extra points from the array
corr_coeff = correlation_coefficient(data_arrayA, data_arrayB)
corr_statemnt = ('Correlation coefficient = ', corr_coeff)
In [ ]:
#Graph the correlation between the sensors
plt.figure(2)
plt.plot(data_arrayA, data_arrayB, "b.")
plt.xlabel("Sensor 1")
plt.ylabel("Sensor 2")
file_title2 = "AQ Sensor Correlation"
plt.title(file_title2)
#Print correlation coefficient
print(corr_statemnt)
plt.show()